#####################
# RUN THE IV MODELS #
#####################
# Author: Kasia Nalewajko
# First created: 3 December 2021
# Replicated: 9 June 2024

rm(list = ls())

# LOAD PACKAGES -----------------------------------------------------------

if (!require("dplyr")) install.packages("dplyr")
if (!require("estimatr")) install.packages("estimatr")
if (!require("modelsummary")) install.packages("modelsummary")
if (!require("fixest")) install.packages("fixest")
if (!require("sf")) install.packages("sf")
if (!require("spdep")) install.packages("spdep")

# LOAD DATA ---------------------------------------------------------------

load("./00 SUBMITTED/00 APSR final/04 replication_files/01 data/main.Rda")

# RUN MODELS --------------------------------------------------------------

# Model with limited controls (columns 1 and 2) ----

PROPmlim_county <- fixest::feols(log(((all_jewish_victims+1)/(allocated_jpop+1)+1)) ~ log(pop1936+1) + log(allocated_jpop+1) + synagogues_dummy + log(collabos_antijew1000+1)+ log(DHI_milipol_sum1942+1) + area_sqkm + longitude + longitudesq + latitude + latitudesq | zone5 + as.factor(ar_name)
                                 | log((FFIFFCgentile*1000/pop1936)+1) ~ log(mpf1000+1),
                                 cluster = ~c(id_bureau, resregion2),
                                 data = main)

# extract additional statistics
finfo <- fitstat(PROPmlim_county, "ivf")
PROPmlim_county_fstat <- round(finfo[["ivf1::log((FFIFFCgentile * 1000/pop1936) + 1)"]][["stat"]])
PROPmlim_county_wh <- round(summary(PROPmlim_county)$iv_wh[["p"]], digits = 4)

# run Moran test and extract the statistic
cantons_sf <- sf::st_read("./00 SUBMITTED/00 APSR final/04 replication_files/01 data/Gay_CANTONS_1940/CANTONS_1940.shp")
obsRemoved <- abs(PROPmlim_county[["obs_selection"]][["obsRemoved"]])
cantons_sf <- cantons_sf[-obsRemoved,]
spatial_weights_matrix <- spdep::poly2nb(pl = cantons_sf, row.names = cantons_sf$deppct, queen = T)
spatial_weights_matrix <- nb2listw(spatial_weights_matrix, zero.policy = T) 
mlim_county_res <- residuals(PROPmlim_county)
PROPmlim_county_moran <- moran.test(x = mlim_county_res, listw = spatial_weights_matrix, zero.policy = T)
PROPmlim_county_moran <- round(PROPmlim_county_moran$estimate[[1]], d = 3)

# Fully specified model (columns 3 and 4) ----

PROPmcontrols_county <- fixest::feols(log(((all_jewish_victims+1)/(allocated_jpop+1)+1)) ~ log(pop1936+1) + log(allocated_jpop+1) + synagogues_dummy + log(collabos_antijew1000+1) + log(DHI_milipol_sum1942+1)  + catholic_churches + log(FRANCISTE_36_1_pct+1) + log(ActionFrancaise_19_pct+1) + turnout_36_1_pct + log(nuance_droite_36_1_pct+1) + log(nuance_centredroit_36_1_pct+1) + log(nuance_centregauche_36_1_pct+1) + log(nuance_gauche_36_1_pct+1) + log(nuance_extremegauche_36_1_pct+1) + area_sqkm + longitude + longitudesq + latitude + latitudesq | zone5 + as.factor(ar_name)
                                      | log((FFIFFCgentile*1000/pop1936)+1) ~ log(mpf1000+1),
                                      cluster = ~c(id_bureau, resregion2), 
                                      data = main)

# extract additional statistics
finfo <- fitstat(PROPmcontrols_county, "ivf")
PROPmcontrols_county_fstat <- round(finfo[["ivf1::log((FFIFFCgentile * 1000/pop1936) + 1)"]][["stat"]])
PROPmcontrols_county_wh <- round(summary(PROPmcontrols_county)$iv_wh[["p"]], digits = 4)

# run Moran test and extract the statistic
cantons_sf <- sf::st_read("./data/other/maps/france1940/CANTONS_1940/CANTONS_1940.shp")
obsRemoved <- abs(PROPmcontrols_county[["obs_selection"]][["obsRemoved"]])
cantons_sf <- cantons_sf[-obsRemoved,]
spatial_weights_matrix <- poly2nb(pl = cantons_sf, row.names = cantons_sf$deppct, queen = T)
spatial_weights_matrix <- nb2listw(spatial_weights_matrix, zero.policy = T) 
mcontrols_county_res <- residuals(PROPmcontrols_county)
PROPmcontrols_county_moran <- moran.test(x = mcontrols_county_res, listw = spatial_weights_matrix, zero.policy = T)
PROPmcontrols_county_moran <- round(PROPmcontrols_county_moran$estimate[[1]], d = 3)

# Preview ----

modelsummary(list(
  'First Stage' = PROPmlim_county$iv_first_stage[[1]],
  'Second Stage' = PROPmlim_county,
  'First Stage' = PROPmcontrols_county$iv_first_stage[[1]],
  'Second Stage' = PROPmcontrols_county),
  stars = c('*' = .1, '**' = .05, '***' = .01))

# OUTPUT TO LATEX ---------------------------------------------------------------

# additional rows
add_stats <- tibble::tribble(
  ~x, ~y, ~q, ~w, ~l,
  'F stat. (1st stage)', PROPmlim_county_fstat, PROPmlim_county_fstat, PROPmcontrols_county_fstat, PROPmcontrols_county_fstat, 
  'Moran stat.', PROPmlim_county_moran, PROPmlim_county_moran, PROPmcontrols_county_moran, PROPmcontrols_county_moran,
  'Wu-Hausman p-value', PROPmlim_county_wh, PROPmlim_county_wh, PROPmcontrols_county_wh, PROPmcontrols_county_wh
  )


attr(add_stats, 'position') <- c(48, 49, 50)

msummary(list(
  'First Stage' = PROPmlim_county$iv_first_stage[[1]],
  'Second Stage' = PROPmlim_county,
  'First Stage' = PROPmcontrols_county$iv_first_stage[[1]],
  'Second Stage' = PROPmcontrols_county
),
stars = c('*' = .1, '**' = .05, '***' = .01),
gof_omit = "AIC|BIC|RMSE|R2 Within|R2 Within Adj.",
coef_omit = "zone5",
add_rows = add_stats,
output = "latex",
coef_rename = c("log(mpf1000 + 1)" = "WWI military death rates",
                "fit_log((FFIFFCgentile * 1000/pop1936) + 1)" = "Insurgent presence",
                "log(pop1936 + 1)" = "1936 population",
                "log(collabos_antijew1000 + 1)" = "Collaborators",
                "log(DHI_milipol_sum1942 + 1)" = "1942 state presence",
                "synagogues_dummy" = "Synagogues",
                "area_sqkm" = "Area size (km2)",
                "longitude" = "Longitude",
                "latitude" = "Latitude",
                "longitudesq" = "Longitude (sq)",
                "latitudesq" = "Latitude (sq)",
                "log(allocated_jpop + 1)" = "1941 Jewish population",
                "catholic_churches" = "Catholic churches",
                "log(FRANCISTE_36_1_pct + 1)" = "Franciste vote 1936",
                "log(ActionFrancaise_19_pct + 1)" = "Action Française vote 1919",
                "turnout_36_1_pct" = "Turnout 1936",
                "log(nuance_droite_36_1_pct + 1)" = "Right vote 1936",
                "log(nuance_centredroit_36_1_pct + 1)" = "Centre-right vote 1936",
                "log(nuance_centregauche_36_1_pct + 1)" = "Centre-left vote 1936",
                "log(nuance_gauche_36_1_pct + 1)" = "Left vote 1936",
                "log(nuance_extremegauche_36_1_pct + 1)" = "Extreme left vote 1936"
),
title = "Main results. Dependent variable: Proportion Holocaust victims (logged)"
)




